<?php

namespace App\Console\Commands;


use App\Models\Items;
use App\Models\ItemsCate;
use GuzzleHttp\Client;
use GuzzleHttp\Pool;
use GuzzleHttp\Psr7\Request;
use GuzzleHttp\Exception\ClientException;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Redis;
use QL\QueryList;


class DataokeGoodsCrawler extends Command
{
    private $totalPageCount = 1;//默认最大页数1454
    private $counter        = 1;
    private $concurrency    = 7;  // 同时并发抓取

    /**
     * The name and signature of the console command.
     *
     * @var string
     */
    protected $signature = 'dataokegoods:crawler';

    /**
     * The console command description.
     *
     * @var string
     */
    protected $description = 'crawler dataoke.com goods data';

    /**
     * Create a new command instance.
     *
     * @return void
     */
    public function __construct()
    {
        parent::__construct();
    }

    public function crawler_log($message = "")
    {
        $log = sprintf(
            "%s, [采集]: %s,%s",
            date('Y-m-d H:i:s'),
            $message,
            PHP_EOL
        );
        file_put_contents(storage_path() . '/logs/crawler.log', $log, FILE_APPEND);
    }

    /**
     * Execute the console command.
     *
     * @return mixed
     */
    public function handle()
    {
        //采集大淘客数据入库
        $client = new Client();

        $requests = function ($page_total) use($client) {
            for ($i = 1;$i<=$this->totalPageCount;$i++)
            {
                $request_uri = sprintf(env('DATAOKE_API_LIST_URL'),env('DATAOKE_APPKEY'),$i);
                yield function () use ($client,$request_uri) {
                  return $client->getAsync($request_uri);
                };
            }
        };

        $pool = new Pool($client, $requests($this->totalPageCount), [
            'concurrency' => $this->concurrency,
            'fulfilled'   => function ($response, $index){
                $res = json_decode($response->getBody(),true);

                //获取到数据
                $page_goods_data = $res['result'] ?? [];

                if(!empty($page_goods_data)) $this->dataClean($page_goods_data);

                $this->countedAndCheckEnded();
            },
            'rejected' => function ($reason, $index){
                $this->error("rejected" );
                $this->error("rejected reason: " . $reason );
                $this->countedAndCheckEnded();
            },
        ]);

        // 开始发送请求
        $promise = $pool->promise();
        $promise->wait();

    }
    public function countedAndCheckEnded()
    {
        if ($this->counter < $this->totalPageCount){
            $this->counter++;
            return;
        }
        $this->info("请求结束！");
    }

    /**
     * @desc 获取二级栏目id，商品小图，店铺名称 等数据
     * @param $item_id
     * @return array|mixed|string
     */
    public function getOverheadItemInfo($item_id)
    {
        $c = new \TopClient;
        $c->appkey = env("TBK_APPKEY");
        $c->secretKey = env("TBK_SECRET");
        $c->format = "json";
        $req = new \TbkItemInfoGetRequest;
        $req->setNumIids($item_id);
        $resp = $c->execute($req);
        $cat_leaf_name = @$resp->results->n_tbk_item[0]->cat_leaf_name;
        $item_small_images = @$resp->results->n_tbk_item[0]->small_images->string;
        $item_shop_name = @$resp->results->n_tbk_item[0]->nick;
        $return_cate_leaf_id = 0;//二级分类
        $return_item_images = "";//商品小图
        $return_shop_name = "";//店铺名称
        $item_data_on_where_cate_leaf_name = ItemsCate::where("title",$cat_leaf_name)->first();
        //get cate leaf id
        if(empty($item_data_on_where_cate_leaf_name))
        {
            //没从数据库中找到从接口解析出的分类  那么 就用 接口的分类分词一下 然后跟数据库关联
            $blob_cat_leaf = $this->analysis($cat_leaf_name);
            $item_data_on_where_cate_leaf_name = ItemsCate::whereIn("title",$blob_cat_leaf)->first();
            if(!empty($item_data_on_where_cate_leaf_name)) $return_cate_leaf_id = $item_data_on_where_cate_leaf_name->id;
        }
        else
            $return_cate_leaf_id = $item_data_on_where_cate_leaf_name->id ;
        //get small images
        if(!empty($item_small_images)) $return_item_images = json_encode($item_small_images);
        //get shop name
        if(!empty($item_shop_name)) $return_shop_name = $item_shop_name;

        return [$return_cate_leaf_id,$return_item_images,$return_shop_name];
    }

    /**
     * @采集淘宝详情数据
     * @param $item_taobao_id
     * @return mixed|string
     */
    public function crawler_goods_detail_info($item_taobao_id)
    {
        $ql = QueryList::get("https://detail.m.tmall.com/item.htm?id=$item_taobao_id");
        $images_array = $ql->find('.mui-custommodule.mdv-custommodule:eq(0) .mui-custommodule-item.unloaded img')->attrs('data-ks-lazyload');
        echo "https://detail.m.tmall.com/item.htm?id=$item_taobao_id";
        $this->info($images_array);
        die;
        return is_array($images_array) ? json_encode($images_array) : "";
    }

    /**
     * 分词
     * @param $title
     * @return array
     */
    public function analysis($title)
    {
        if(env('BOSONNLP_TOKEN'))
        {
            $client = new Client();
            $str = $title;
            $res = $client->request('POST', 'http://api.bosonnlp.com/tag/analysis',[
                'headers' => [
                    'Content-Type' => 'application/json',
                    'Accept'     => 'application/json',
                    'X-Token'      => env('BOSONNLP_TOKEN')
                ],
                'body'=>json_encode([$str])
            ]);
            $body = (string)$res->getBody();
            $json = \GuzzleHttp\json_decode($body,true);
            if($json)
            {
                $word = $json[0]['word'];
                return $word;
            }
        }
        else
            return [];
    }

    /**
     * @desc 采集的数据清洗 去重，入库
     */
    public function dataClean($ori_data = [])
    {
        if(empty($ori_data)) return false;

        foreach ($ori_data as $k=>$v)
        {
            //检测是否已经采集过当前商品 采集过跳过,没采集过 入库 并加入到采集过的id集合中
            if(Redis::sismember(env('ITEMS_CRAWLERED_COLLECTIONS_REDIS_KEY'),$v['GoodsID'])) continue;
            else
            {
                //没有采集过
                Redis::sadd(env('ITEMS_CRAWLERED_COLLECTIONS_REDIS_KEY'),$v['GoodsID']);
                list($return_cate_leaf_id,$return_item_images,$return_shop_name) = $this->getOverheadItemInfo($v['GoodsID']);
                Items::create([
                    'short_title'=>$v['D_title'],
                    'title'=>$v['Title'],
                    'item_description_score'=>$v['Dsr'],
                    'commission_queqiao'=>$v['Commission_queqiao'],
                    'quan_receive'=>$v['Quan_receive'],
                    'quan_price'=>$v['Quan_price'],
                    'yongjin_type'=>$v['Yongjin_type'],
                    'quan_time'=>$v['Quan_time'],
                    'jihua_link'=>$v['Jihua_link'],
                    'price'=>$v['Price'],
                    'jihua_shenhe'=>$v['Jihua_shenhe'],
                    'introduce'=>$v['Introduce'],
                    'cid'=>$v['Cid'],
                    'cate_leaf_id'=>$return_cate_leaf_id,
                    'sales_nums'=>$v['Sales_num'],
                    'quan_link'=>"",
                    'is_tmall'=>$v['IsTmall'],
                    'tb_items_id'=>$v['GoodsID'],
                    'commission_jihua'=>$v['Commission_jihua'],
                    'que_siteid'=>(int)$v['Que_siteid'],
                    'commission'=>$v['Price'] * ($v['Commission'] / 100),
                    'pic'=>$v['Pic'],
                    'small_images'=>$return_item_images,
                    'detail_images'=>$this->crawler_goods_detail_info($v['GoodsID']),
                    'org_price'=>$v['Org_Price'],
                    'quan_m_link'=>$v['Quan_m_link'],
                    'quan_id'=>$v['Quan_id'],
                    'quan_condition'=>$v['Quan_condition'],
                    'quan_surplus'=>$v['Quan_surplus'],
                    'seller_id'=>$v['SellerID'],
                    'shop_name'=>$return_shop_name,
                ]);
                usleep(5000);
            }

        }
    }

}
